from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Please click here to toggle on/off the code"></form>''')
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import warnings
import dython
from collections import Counter
import scipy.stats as ss
warnings.filterwarnings('ignore')
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from wordcloud import WordCloud, STOPWORDS
# Models
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from scipy import stats
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from scipy import interp
from itertools import cycle
from sklearn.metrics import confusion_matrix
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
import warnings
warnings.filterwarnings('ignore')
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) <ipython-input-9-f3c1f613636c> in <module> 17 import seaborn as sns 18 import numpy as np ---> 19 from wordcloud import WordCloud, STOPWORDS 20 21 # Models ModuleNotFoundError: No module named 'wordcloud'
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Road accidents has been a critical problem as every year more than 1.2M people die across the globle. There is a pressing need to make use of the data and understand the underlying cause of problem. Road safety issues are complex. There are significant differences in policies within and across the countries. In this analysis, the data from Metropolitan Police Department's (MPD) crash data management system (COBALT) is studied to find relationship between fatality and independent features. The crash data is for DC state.
Each year, more than 1.2 million people die across the globe due to road crashes; there is a pressing need to understand the underlying cause of the problem. As road safety issues are complex; it involves multi-sectorial ranging from the public, stakeholders to the policy makers. Significant differences exist both across and within countries and therefore policies and interventions need to be adapted to the local environment. The effectiveness of interventions requires a multi-disciplinary approach which include enforcement, engineering and psychological and education approaches. While the resources are limited, road safety interventions must not only address the sustainability of the outcomes but also the cost-effectiveness to implement and maintain it. More important, interventions must be evidence-based and can be evaluated over time before it is translated into policy. Hence, the research cannot be done in silo for better addressing the complexity of road safety issues. For sustainability, road safety interventions need to be guided and governed by policy in the implementation and development.
# data = pd.read_csv('drive/MyDrive/Crash_Details_Table.csv', delimiter='\t')
data = pd.read_csv('Crash_Details_Table.csv', delimiter='\t')
data.head()
C:\Users\admin\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py:3146: DtypeWarning: Columns (2) have mixed types.Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
| id | crime id | ccn | person id | person type | age | fatal | major injury | minor injury | vehicle id | vehicle type | ticket issued | License state | impaired | speeding | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 438194351 | 26872544 | 16034312 | 84628234 | Passenger | 31.0 | N | N | N | 2275009 | Passenger Car/automobile | N | VA | Y | N |
| 1 | 438194352 | 26872544 | 16034312 | 84833902 | Passenger | 31.0 | N | N | N | 2275009 | Passenger Car/automobile | N | VA | Y | N |
| 2 | 438194353 | 26872544 | 16034312 | 84938064 | Driver | NaN | N | N | N | 2275007 | Passenger Car/automobile | N | None | N | N |
| 3 | 438194354 | 26872544 | 16034312 | 84790164 | Driver | 31.0 | N | N | N | 2275009 | Passenger Car/automobile | N | VA | N | N |
| 4 | 438194355 | 26872544 | 16034312 | 84953497 | Passenger | 47.0 | N | N | Y | 2275008 | Passenger Car/automobile | N | VA | Y | N |
data.describe()
| id | crime id | person id | age | |
|---|---|---|---|---|
| count | 5.963810e+05 | 5.963810e+05 | 5.963810e+05 | 426744.000000 |
| mean | 4.384924e+08 | 2.672116e+07 | 8.506922e+07 | 38.668302 |
| std | 1.721813e+05 | 1.238390e+06 | 8.613766e+06 | 20.897059 |
| min | 4.370014e+08 | 2.341134e+07 | 1.045383e+07 | -7990.000000 |
| 25% | 4.383433e+08 | 2.532167e+07 | 8.474899e+07 | 27.000000 |
| 50% | 4.384924e+08 | 2.680585e+07 | 8.497752e+07 | 37.000000 |
| 75% | 4.386415e+08 | 2.769386e+07 | 8.712287e+07 | 51.000000 |
| max | 4.387906e+08 | 2.872803e+07 | 9.077153e+07 | 237.000000 |
data['age'] = np.where(data['age']<1, np.nan, data['age'])
fig = px.histogram(data, x="age", marginal="box")
fig.show()